from urllib.request import urlopen
from bs4 import BeautifulSoup
# Get data into pandas df
URL = "https://www.michigan.gov/coronavirus/0,9753,7-406-98163_98173---,00.html"
HTML = urlopen(URL).read().decode("utf-8")
start_index = HTML.find("shortdesc")
end_index = HTML.find("footerArea")
data = HTML[start_index:end_index]
soup = BeautifulSoup(data, features="html.parser")
links = [link.get('href') for link in soup.find_all('a')]
finallink = "https://michigan.gov" + \
[i for i in links if "by_Date" in i][0]
temp <- tempfile()
download.file(py$finallink, destfile = temp)
mi_data <- readxl::read_excel(temp)
mi_data = r.mi_data
max_date = max(mi_data["Updated"])
agg_data = mi_data.groupby(["Date"], as_index=False).sum()
date_update = format(py$max_date, '%d %b %Y')
mi_cases_by_day = py$agg_data
head(mi_cases_by_day)
## Date Cases Deaths Cases.Cumulative Deaths.Cumulative
## 1 2020-02-29 19:00:00 14 0 14 0
## 2 2020-03-01 19:00:00 13 1 27 1
## 3 2020-03-02 19:00:00 22 0 49 1
## 4 2020-03-03 19:00:00 24 0 73 1
## 5 2020-03-04 19:00:00 26 0 99 1
## 6 2020-03-05 19:00:00 42 0 141 1
plot_ly(
mi_cases_by_day,
x = ~Date,
y = ~Cases
)
mi_cases_by_day <- mi_cases_by_day %>%
mutate(
cases_ma = rollapply(Cases, 7, mean, align = "center", fill = 0),
deaths_ma = rollapply(Deaths, 7, mean, align = "center", fill = 0)
)
ay <- list(tickfont = list(color = "red"),
overlaying = "y",
side = "right",
title = "Deaths")
plot_ly(mi_cases_by_day,x = ~Date) %>%
# Cases
add_trace(y = ~Cases, alpha = .6, name = "Cases", type = "scatter",
mode = 'markers') %>%
# Cases MA
add_lines(y = ~cases_ma, alpha = .8, name = "Cases MA", mode = 'markers') %>%
# Deaths
add_trace(name = "Deaths", yaxis = "y2", alpha = .15, y = ~Deaths, x = ~Date,
color = I("red"), type = "scatter", mode = 'markers') %>%
# Deaths MA
add_lines(name = "Deaths MA", yaxis = "y2", y = ~deaths_ma, x = ~Date,
line = list(color = I("red")), alpha = .8/4, mode = 'markers') %>%
layout(
title = "Michigan COVID Cases/Deaths<br>With 7-day Moving Average",
yaxis2 = ay, legend = list(x = 0.6, y = 0.9),margin = list(r = 50, t = 50)
)
Storytelling/comparison drove what I was after. Here, I’m trying to show how COVID deaths follow COVID cases, and choices made in this visualization are made to show that comparison.
Nothing. I wish I could have made cases and deaths more distinguishable on the plot, but plotly created lots of frustrations in differentiating the two.
As mentioned before, making more clear the difference between deaths and cases would have been nice.